package com.apobates.forum.utils.sensitive;

import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.TreeMap;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.hankcs.algorithm.AhoCorasickDoubleArrayTrie;
/**
 * 敏感词过滤类
 * {@link https://github.com/hankcs/AhoCorasickDoubleArrayTrie}
 * 
 * @author xiaofanku
 * @since 20191209
 */
public class SensitiveWordFilter {
	private static SensitiveWordFilter instance=null;
	private final AhoCorasickDoubleArrayTrie<String> arrayTrie;
	
	private SensitiveWordFilter(String dictionaryFilePath) throws IOException{
		AhoCorasickDoubleArrayTrie<String> struct = new AhoCorasickDoubleArrayTrie<>();
		TreeMap<String, String> data = new TreeMap<>();
		try(RandomAccessFile raf = new RandomAccessFile(dictionaryFilePath, "r")){
			String line=null;
			while((line=raf.readLine())!=null){
				String tmp = new String(line.getBytes("ISO-8859-1"), "utf-8");
				data.put(tmp, tmp);
			}
		}
		if(!data.isEmpty()){
			struct.build(data);
			arrayTrie = struct;
		}else{
			arrayTrie = null;
		}
	}
	
	/**
	 * 获取一个实例
	 * 
	 * @param dictionaryFilePath 敏感词库的位置
	 * @return
	 * @throws IOException
	 */
	public static SensitiveWordFilter getInstance(String dictionaryFilePath) throws IOException {
		if (null == instance) {
			synchronized (SensitiveWordFilter.class) {
				if (null == instance){
					instance = new SensitiveWordFilter(dictionaryFilePath);
				}
			}
		}
		return instance;
	}
	
	/**
	 * 执行敏感词遮盖
	 * 
	 * @param content 待检查的内容段
	 * @exception 若参数为null或脏词词库为空或无法访问都会产生此异常
	 * @return
	 */
	public String execute(String content)throws NullPointerException{
		Objects.requireNonNull(content);
		//先备份
		ImageShelter is = new ImageShelter(content);
		//敏感词过滤
		StringBuffer sb = new StringBuffer(content);
		List<AhoCorasickDoubleArrayTrie.Hit<String>> hits = arrayTrie.parseText(sb);
		for (AhoCorasickDoubleArrayTrie.Hit<String> hit : hits) {
			for (int i = hit.begin; i < hit.end; i++) {
				sb.deleteCharAt(i);
				sb.insert(i, "*");
			}
		}
		//对比
		return is.protection(sb.toString());
	}
	
	//先备份->对比->替换
	private class ImageShelter{
		//图片的原地址
		private final List<String> originalSrc;
		
		public ImageShelter(String content){
			List<String> data = new ArrayList<>();
			//提取里面的图片
			Document doc = Jsoup.parse(content);
			Elements imgTags = doc.select("img[src]");
			for(Element element : imgTags){
				data.add(element.attr("src"));
			}
			originalSrc = Collections.unmodifiableList(data);
		}
		//返回对比的结果
		public String protection(String sensitiveResult){
			if(originalSrc.isEmpty()){ //无图片
				return sensitiveResult;
			}
			Document doc = Jsoup.parse(sensitiveResult);
			Elements imgTags = doc.select("img[src]");
			
			for(int i=0; i< imgTags.size();i++){
				Element te = imgTags.get(i);
				if(!te.attr("src").equals(originalSrc.get(i))){
					te.attr("src", originalSrc.get(i));
				}
			}
			return doc.body().html();
		}
	}
}
